Name - Shreyanshu Batchno- 197
import pandas as pd,numpy as np
df = pd.read_csv(r"C:\Users\sreya\Downloads\dataset.csv")
df
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | JTMEB3FV6N | Monroe | Key West | FL | 33040 | 2022 | TOYOTA | RAV4 PRIME | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 42 | 0 | NaN | 198968248 | POINT (-81.80023 24.5545) | NaN | 12087972100 |
| 1 | 1G1RD6E45D | Clark | Laughlin | NV | 89029 | 2013 | CHEVROLET | VOLT | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 38 | 0 | NaN | 5204412 | POINT (-114.57245 35.16815) | NaN | 32003005702 |
| 2 | JN1AZ0CP8B | Yakima | Yakima | WA | 98901 | 2011 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 73 | 0 | 15.0 | 218972519 | POINT (-120.50721 46.60448) | PACIFICORP | 53077001602 |
| 3 | 1G1FW6S08H | Skagit | Concrete | WA | 98237 | 2017 | CHEVROLET | BOLT EV | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 238 | 0 | 39.0 | 186750406 | POINT (-121.7515 48.53892) | PUGET SOUND ENERGY INC | 53057951101 |
| 4 | 3FA6P0SU1K | Snohomish | Everett | WA | 98201 | 2019 | FORD | FUSION | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 38.0 | 2006714 | POINT (-122.20596 47.97659) | PUGET SOUND ENERGY INC | 53061041500 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 112629 | 7SAYGDEF2N | King | Duvall | WA | 98019 | 2022 | TESLA | MODEL Y | Battery Electric Vehicle (BEV) | Eligibility unknown as battery range has not b... | 0 | 0 | 45.0 | 217955265 | POINT (-121.98609 47.74068) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032401 |
| 112630 | 1N4BZ1CP7K | San Juan | Friday Harbor | WA | 98250 | 2019 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 150 | 0 | 40.0 | 103663227 | POINT (-123.01648 48.53448) | BONNEVILLE POWER ADMINISTRATION||ORCAS POWER &... | 53055960301 |
| 112631 | 1FMCU0KZ4N | King | Vashon | WA | 98070 | 2022 | FORD | ESCAPE | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 38 | 0 | 34.0 | 193878387 | POINT (-122.4573 47.44929) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033027702 |
| 112632 | KNDCD3LD4J | King | Covington | WA | 98042 | 2018 | KIA | NIRO | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 47.0 | 125039043 | POINT (-122.09124 47.33778) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032007 |
| 112633 | YV4BR0CL8N | King | Covington | WA | 98042 | 2022 | VOLVO | XC90 | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 18 | 0 | 47.0 | 194673692 | POINT (-122.09124 47.33778) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032005 |
112634 rows × 17 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 112634 entries, 0 to 112633 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 VIN (1-10) 112634 non-null object 1 County 112634 non-null object 2 City 112634 non-null object 3 State 112634 non-null object 4 Postal Code 112634 non-null int64 5 Model Year 112634 non-null int64 6 Make 112634 non-null object 7 Model 112614 non-null object 8 Electric Vehicle Type 112634 non-null object 9 Clean Alternative Fuel Vehicle (CAFV) Eligibility 112634 non-null object 10 Electric Range 112634 non-null int64 11 Base MSRP 112634 non-null int64 12 Legislative District 112348 non-null float64 13 DOL Vehicle ID 112634 non-null int64 14 Vehicle Location 112610 non-null object 15 Electric Utility 112191 non-null object 16 2020 Census Tract 112634 non-null int64 dtypes: float64(1), int64(6), object(10) memory usage: 14.6+ MB
df.isnull().sum()
VIN (1-10) 0 County 0 City 0 State 0 Postal Code 0 Model Year 0 Make 0 Model 20 Electric Vehicle Type 0 Clean Alternative Fuel Vehicle (CAFV) Eligibility 0 Electric Range 0 Base MSRP 0 Legislative District 286 DOL Vehicle ID 0 Vehicle Location 24 Electric Utility 443 2020 Census Tract 0 dtype: int64
# Filling the nullvalues
df["Model"]=df.groupby(["Make"])['Model'].transform(lambda x: x.fillna(x.mode()[0]))
df["Legislative District"]=df.groupby(["County","City","State","Postal Code"])["Legislative District"].transform(lambda x: x.fillna(x.mode()[0]))
df["Electric Utility"]=df.groupby(["Make","Model","Electric Vehicle Type","Clean Alternative Fuel Vehicle (CAFV) Eligibility"])["Electric Utility"].transform(lambda x: x.fillna(x.mode()[0]))
df.dropna(subset=['Vehicle Location'], inplace=True)
df.reset_index(drop=True,inplace=True)
df
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | JTMEB3FV6N | Monroe | Key West | FL | 33040 | 2022 | TOYOTA | RAV4 PRIME | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 42 | 0 | NaN | 198968248 | POINT (-81.80023 24.5545) | CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA) | 12087972100 |
| 1 | 1G1RD6E45D | Clark | Laughlin | NV | 89029 | 2013 | CHEVROLET | VOLT | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 38 | 0 | NaN | 5204412 | POINT (-114.57245 35.16815) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 32003005702 |
| 2 | JN1AZ0CP8B | Yakima | Yakima | WA | 98901 | 2011 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 73 | 0 | 15.0 | 218972519 | POINT (-120.50721 46.60448) | PACIFICORP | 53077001602 |
| 3 | 1G1FW6S08H | Skagit | Concrete | WA | 98237 | 2017 | CHEVROLET | BOLT EV | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 238 | 0 | 39.0 | 186750406 | POINT (-121.7515 48.53892) | PUGET SOUND ENERGY INC | 53057951101 |
| 4 | 3FA6P0SU1K | Snohomish | Everett | WA | 98201 | 2019 | FORD | FUSION | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 38.0 | 2006714 | POINT (-122.20596 47.97659) | PUGET SOUND ENERGY INC | 53061041500 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 112605 | 7SAYGDEF2N | King | Duvall | WA | 98019 | 2022 | TESLA | MODEL Y | Battery Electric Vehicle (BEV) | Eligibility unknown as battery range has not b... | 0 | 0 | 45.0 | 217955265 | POINT (-121.98609 47.74068) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032401 |
| 112606 | 1N4BZ1CP7K | San Juan | Friday Harbor | WA | 98250 | 2019 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 150 | 0 | 40.0 | 103663227 | POINT (-123.01648 48.53448) | BONNEVILLE POWER ADMINISTRATION||ORCAS POWER &... | 53055960301 |
| 112607 | 1FMCU0KZ4N | King | Vashon | WA | 98070 | 2022 | FORD | ESCAPE | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 38 | 0 | 34.0 | 193878387 | POINT (-122.4573 47.44929) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033027702 |
| 112608 | KNDCD3LD4J | King | Covington | WA | 98042 | 2018 | KIA | NIRO | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 47.0 | 125039043 | POINT (-122.09124 47.33778) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032007 |
| 112609 | YV4BR0CL8N | King | Covington | WA | 98042 | 2022 | VOLVO | XC90 | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 18 | 0 | 47.0 | 194673692 | POINT (-122.09124 47.33778) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032005 |
112610 rows × 17 columns
df.isnull().sum()
VIN (1-10) 0 County 0 City 0 State 0 Postal Code 0 Model Year 0 Make 0 Model 0 Electric Vehicle Type 0 Clean Alternative Fuel Vehicle (CAFV) Eligibility 0 Electric Range 0 Base MSRP 0 Legislative District 281 DOL Vehicle ID 0 Vehicle Location 0 Electric Utility 0 2020 Census Tract 0 dtype: int64
df[df.duplicated()]
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract |
|---|
df["Postal Code"].value_counts()
98052 2916
98033 2059
98004 2001
98115 1880
98006 1852
...
98305 1
21701 1
98621 1
84128 1
83876 1
Name: Postal Code, Length: 758, dtype: int64
df["Model Year"].value_counts()
2022 26529 2021 18361 2018 14245 2020 11037 2019 10265 2017 8642 2016 5732 2015 4940 2013 4686 2014 3683 2023 1886 2012 1702 2011 838 2010 24 2008 23 2000 10 1999 3 2002 2 1997 1 1998 1 Name: Model Year, dtype: int64
From above we can see that Postal Code,Model Year and Legislative District are in numerical but they are not the continuous data
df["Legislative District"]=df["Legislative District"].astype("str")
df["Postal Code"]=df["Postal Code"].astype("str")
df["Model Year"]=df["Model Year"].astype("str")
import plotly.express as px
area=df["Model Year"].value_counts().nlargest(n=10)
fig=px.bar(y=area.values,
x=area.index,
orientation="v",
color=area.index,
text=area.values,
color_discrete_sequence=px.colors.qualitative.Bold)
fig.show()
area=df["Make"].value_counts().nlargest(n=10)
fig=px.bar(y=area.values,
x=area.index,
orientation="v",
color=area.index,
text=area.values,
color_discrete_sequence=px.colors.qualitative.Bold)
fig.show()
Vechile_type = df['Electric Vehicle Type'].value_counts()
fig = px.pie(Vechile_type, names=Vechile_type.index, values=Vechile_type.values, title='Pie Chart of Electric Vehicle Type')
fig.show()
fig=px.histogram(df,x="Electric Range",nbins=10)
fig.show()
fig=px.box(df,x="Electric Range")
fig.show()
fig_boxplot = px.box(df, x='Electric Vehicle Type', y='Electric Range', title='Box Plot of Electric Vehicle Type vs. Electric Range')
fig_boxplot.show()
import plotly.graph_objects as go
def create_ev_choropleth_map(df):
# Calculate the count of EV vehicles for each state
ev_count_by_state = df['State'].value_counts().reset_index()
ev_count_by_state.columns = ['State', 'EV Count']
# Create the Choropleth map using plotly.graph_objects
fig_choropleth = go.Figure(data=go.Choropleth(
locations=ev_count_by_state['State'],
z=ev_count_by_state['EV Count'],
locationmode='USA-states',
colorscale='Viridis',
colorbar_title='Number of EV Vehicles',
))
# Set the map title and layout
fig_choropleth.update_layout(
title_text='Choropleth Map of EV Vehicles by State',
geo_scope='usa', # Limit map scope to the USA
)
# Return the Choropleth map figure
return fig_choropleth
# Call the function and display the Choropleth map
fig = create_ev_choropleth_map(df)
fig.show()
import pandas as pd
# Converting the 'Model Year' column to datetime type
df['Model Year'] = pd.to_datetime(df['Model Year'], format='%Y')
# Group by 'Model Year' and 'Make' to get the count of each make for each year
df_grouped = df.groupby(['Model Year', 'Make']).size().reset_index(name='Count')
import bar_chart_race as bcr
# Pivot the DataFrame to have 'Make' as columns and 'Model Year' as index
df_pivot = df_grouped.pivot(index='Model Year', columns='Make', values='Count')
# Fill missing values using forward fill (pad)
df_pivot = df_pivot.fillna(method='pad')
# Create the Racing Bar Plot
bcr.bar_chart_race(
df=df_pivot,
filename='ev_make_racing_bar_plot.mp4', # Output file name for the animation
orientation='h', # Horizontal bars
sort='desc', # Sort the bars in descending order
n_bars=10, # Number of bars to show
fixed_order=False, # Allow bars to change positions
title='EV Make Racing Bar Plot by Year', # Animation title
label_bars=True, # Show the value label on each bar
period_label={'x': 0.99, 'y': 0.25, 'ha': 'right', 'va': 'center'}, # Position of the year label
period_fmt='%Y', # Format of the year label
)
fig_bar = px.histogram(df,x ='Make',y='Model Year',
animation_frame="Make", animation_group="Model Year",
color_discrete_sequence=px.colors.qualitative.T10)
fig_bar.update_yaxes(showgrid=False),
fig_bar.update_xaxes(categoryorder='total descending')
fig_bar.update_traces(hovertemplate=None)
fig_bar.update_layout(margin=dict(t=70, b=0, l=70, r=40),
hovermode="x unified",
xaxis_tickangle=360,
xaxis_title=' ', yaxis_title=" ",
plot_bgcolor='#2d3035', paper_bgcolor='#2d3035',
title_font=dict(size=25, color='#a5a7ab', family="Lato, sans-serif"),
font=dict(color='#8a8d93'),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)